library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
mpg data framempg
## # A tibble: 234 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manual… f 21 29 p comp…
## 3 audi a4 2 2008 4 manual… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto(a… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto(l… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manual… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto(a… f 18 27 p comp…
## 8 audi a4 quat… 1.8 1999 4 manual… 4 18 26 p comp…
## 9 audi a4 quat… 1.8 1999 4 auto(l… 4 16 25 p comp…
## 10 audi a4 quat… 2 2008 4 manual… 4 20 28 p comp…
## # … with 224 more rows
summary(mpg)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
ggplot(data=mpg)+
geom_point(mapping=aes(x=displ, y=hwy))
ggplot(): create a coordinate system that you can add layers togeom_point(): add a layer of points to the plotggplot(date=<DATA>)+<GEROM_FUNCTION>(mapping=aes(<MAPPINGS>))
To map an aesthetic to a variable, associate the name of the aesthetic to the name of the variable inside aes(). ggplot2 will automatically assign a unique level of the aesthetic to each unique value of the variable, a process known as scaling. ggplot2 will also add a legend that explains which levels correspond to which values.
color
ggplot(data=mpg)+geom_point(map=aes(x=displ, y=hwy, color=class))
ggplot(data=mpg)+geom_point(map=aes(x=displ, y=hwy, alpha=class))
## Warning: Using alpha for a discrete variable is not advised.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ, y=hwy, shape=class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data=mpg)+geom_point(map=aes(x=displ, y=hwy), color="blue")
ggplot(data=mpg)+geom_point(mapping=aes(x=displ, y=hwy), size=5, color="white", stroke=1)
ggplot(data=mpg)+geom_point(mapping=aes(x=displ, y=hwy), size=5, color="white", stroke=2)
+ sign has to come at the end of the line, not the start.One way to add additional variables is with aesthetics. Another way, particularly useful for categprical variables, is to split your plot into facets, subplots that each display one subset of the data.
To facet your plot by a single variable, use facet_wrap(). The first argument of facet_wrap() should be a formula, which you create with ~ followed by a variable name. The variabe that you pass to facet_wrap()should be discrete.
ggplot(data=mpg)+
geom_point(mapping=aes(x=displ, y=hwy))+
facet_wrap(~class, nrow=2)
facet_grid() to your plot call. The first argument of facet_grid() is also a formula. This time the formula should contain two variable names separated by a ~.ggplot(data=mpg)+
geom_point(mapping=aes(x=displ, y=hwy))+
facet_grid(drv~cyl)
. instead of a variable name.ggplot(data=mpg)+
geom_point(mapping=aes(x=displ, y=hwy))+
facet_grid(.~cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
gg=ggplot(data=mpg)
gg+geom_point(mapping=aes(x=displ, y=hwy))
gg+geom_smooth(mapping=aes(x=displ, y=hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
gg+geom_smooth(mapping=aes(x=displ, y=hwy, linetype=drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
gg+
geom_point(mapping=aes(x=displ, y=hwy, color=drv))+
geom_smooth(mapping=aes(x=displ, y=hwy, linetype=drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy))+
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
gg+geom_smooth(mapping=aes(x=displ, y=hwy, group=drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
gg+geom_smooth(mapping=aes(x=displ, y=hwy, color=drv), show.legend=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy))+
geom_point(mapping=aes(color=class))+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy))+
geom_point(mapping=aes(color=class))+
geom_smooth(data=filter(mpg, class=="subcompact"), se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy))+
geom_point()+
geom_smooth(color="blue", se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy))+
geom_point()+
geom_smooth(aes(group=drv), se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy, color=drv))+
geom_point()+
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg )+
geom_point(mapping=aes(x=displ, y=hwy, color=drv))+
geom_smooth(mapping=aes(x=displ, y=hwy), se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg, mapping=aes(x=displ, y=hwy))+
geom_point(mapping=aes(color=drv))+
geom_smooth(aes(linetype=drv), se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data=mpg)+
geom_point(mapping=aes(x=displ, y=hwy, color=drv), stroke=2 )
diamonds
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39
## # … with 53,930 more rows
ggplot(data=diamonds)+
geom_bar(mapping=aes(x=cut))
stat_count() instead of geom_bar():ggplot(data=diamonds)+
stat_count(mapping=aes(x=cut))